import requests
import re

def geturls():
    res = requests.get("http://www.qstheory.cn/dukan/qs/2014/2019-01/01/c_1123924172.htm")
    res.encoding="utf-8"
    html=res.text
    raw_urls=re.findall(r"(?<=href=)\S+",html)
    urls = []
    for url in raw_urls:
        if url.endswith("htm\"") and url.find("dukan")!=-1:
            urls.append(url.replace("\"",""))
    urls.pop()
    return urls


if __name__ == "__main__":
    urls = geturls()
    for x in urls:
        print(x)